OHI British Columbia | OHI Science | Citation policy
knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/',
echo = TRUE, message = FALSE, warning = FALSE)
library(raster)
library(data.table)
library(sf)
source('https://raw.githubusercontent.com/oharac/src/master/R/common.R') ###
### includes library(tidyverse); library(stringr); dir_M points to ohi directory
dir_git <- '~/github/spp_risk_dists'
### goal specific folders and info
dir_setup <- file.path(dir_git, 'data_setup')
dir_data <- file.path(dir_git, 'data')
dir_o_anx <- file.path(dir_O, 'git-annex/spp_risk_dists')
source(file.path(dir_git, 'data_setup/api_fxns.R'))Create a set of maps of the distribution of biodiversity intactness - all species assessed and mapped by IUCN. These maps are generated at 0.10° lat/long resolution. These maps will be generated using all available species:
A selection of these maps will be generated for taxonomic groups and range sizes in a separate Rmd.
Future iterations may include:
IUCN Red List spatial data download IUCN Red List API Gina Ralph (IUCN)
In data_setup we have calculated, for each taxonomic group, cell values for mean risk, var risk, n_spp risk, n threatened, mean trend, n_spp trend. Here we bring those data frames all together to calculate these values across all assessed species.
Note: reconstructing the total mean per cell from the group means is straightforward: \[\bar x_T = \frac{1}{\sum_{g=1}^G n_g} \sum_{g=1}^G (n_g \bar x_g)\] but reconstructing the variance is more complex. Here is the derivation starting from sample variance of the total data set: \[\begin{align*} s_T^2 &= \frac{1}{n_T - 1} \sum_{i=1}^{n_T}(x_i - \bar x_T)^2\\ &= \frac{1}{n_T - 1} \left( \sum_{i=1}^{n_T} x_i^2 - 2 \sum_{i=1}^{n_T} x_i \bar x_T + \sum_{i=1}^{n_T} \bar x_T^2 \right)\\ &= \frac{1}{n_T - 1} \left( \sum_{i=1}^{n_T} x_i^2 - n_T \bar x_T^2 \right)\\ \Longrightarrow \hspace{5pt} (n_T - 1) s_T^2 + n_T \bar x_T^2 &= \sum_{i=1}^{n_T} x_i^2 &\text{(identity 1)}\\ (n_T - 1) s_T^2 + n_T \bar x_T^2 &= \sum_{i=1}^{n_T} x_i^2 = \sum_{j=1}^{n_{gp1}} x_j^2 + \sum_{k=1}^{n_{gp2}} x_k^2 + ... &\text{(decompose into groups)}\\ &= (n_{gp1} - 1) s_{gp1}^2 + n_{gp1} \bar x_{gp1}^2 + (n_{gp2} - 1) s_{gp2}^2 + n_{gp2} \bar x_{gp2}^2 + ... &\text{(sub in identity 1)}\\ &= \sum_{gp = 1}^{Gp} \left((n_{gp} - 1) s_{gp}^2 + n_{gp} \bar x_{gp}^2 \right)\\ \Longrightarrow s_T^2 &= \frac{1}{n_T - 1} \sum_{gp = 1}^{Gp} \left[(n_{gp} - 1) s_{gp}^2 + n_{gp} \bar x_{gp}^2 \right] - \frac{ n_T}{n_T - 1} \bar x_T^2 \end{align*}\]Because of file sizes, the intermediate files will be stored outside of GitHub.
cell_summary_file <- file.path(dir_o_anx, 'cell_summary_unweighted.csv')
reload <- FALSE
if(!file.exists(cell_summary_file) | reload == TRUE) {
dir_taxa_summaries <- file.path(dir_o_anx, 'taxa_summaries')
sum_files <- list.files(dir_taxa_summaries,
pattern = sprintf('cell_sum_%s.csv', api_version),
full.names = TRUE)
message('going into the first mclapply...')
ptm <- system.time({
cell_values_all <- parallel::mclapply(sum_files, mc.cores = 32,
FUN = function(x) {
read_csv(x, col_types = 'dddiidi')
}) %>%
bind_rows()
}) ### end of system.time
message('... processing time ', ptm[3], ' sec')
### chunk into smaller bits for mclapply usage in the summarizing step. Use mclapply to chunk,
### then pass result to mclapply to calculate weighted average values
chunksize <- 10000
cell_ids <- cell_values_all$cell_id %>% unique() %>% sort()
n_chunks <- ceiling(length(cell_ids) / chunksize)
message('going into the second mclapply...')
# system.time({
### standard: 3 sec for 5, 12.5 sec for 20... 32 sec for 50... 479 chunks ~ 6 min.
### mclapply: 4 sec for 50, with 32 cores. 52 sec for the whole shebang.
ptm <- system.time({
cell_vals_list <- parallel::mclapply(1:n_chunks, mc.cores = 12,
FUN = function(x) { ### x <- 1
btm_i <- (x - 1) * chunksize + 1
top_i <- min(x * chunksize, length(cell_ids))
ids <- cell_ids[btm_i:top_i]
df <- cell_values_all %>%
filter(cell_id %in% ids)
}) ### end of mclapply
}) ### end of system.time
message('... processing time ', ptm[3], ' sec')
message('going into the third mclapply...')
ptm <- system.time({
cell_summary_list <- parallel::mclapply(cell_vals_list, mc.cores = 24,
FUN = function(x) {
### x <- cell_vals_list[[250]]
y <- x %>%
group_by(cell_id) %>%
summarize(mean_risk = sum(mean_risk * n_spp_risk) / sum(n_spp_risk),
n_spp_risk = sum(n_spp_risk), ### n_total
n_spp_threatened = sum(n_spp_threatened, na.rm = TRUE),
pct_threatened = n_spp_threatened / n_spp_risk,
mean_trend = sum(mean_trend * n_spp_trend, na.rm = TRUE) / sum(n_spp_trend, na.rm = TRUE),
n_spp_trend = sum(n_spp_trend, na.rm = TRUE))
z <- x %>%
mutate(mean_risk_g = mean_risk, ### protect it
n_spp_risk_g = n_spp_risk, ### protect it
var_risk_g = ifelse(var_risk < 0 | is.na(var_risk) | is.infinite(var_risk),
0, var_risk)) %>%
### any non-valid variances probably due to only one observation, which
### results in corrected var of infinity... set to zero and proceed!
group_by(cell_id) %>%
summarize(mean_risk_t = sum(mean_risk_g * n_spp_risk_g) / sum(n_spp_risk_g),
n_spp_risk_t = sum(n_spp_risk_g), ### n_total
var_risk = 1 / (n_spp_risk_t - 1) *
(sum(var_risk_g * (n_spp_risk_g - 1) + n_spp_risk_g * mean_risk_g^2) -
n_spp_risk_t * mean_risk_t^2),
var_risk = ifelse(var_risk < 0, 0, var_risk),
var_risk = ifelse(is.nan(var_risk) | is.infinite(var_risk), NA, var_risk)) %>%
### get rid of negative (tiny) variances and infinite variances
select(cell_id, var_risk)
yz <- left_join(y, z, by = 'cell_id')
return(yz)
}) ### end of mclapply
}) ### end of system.time
message('... processing time ', ptm[3], ' sec')
message('done!')
cell_summary <- cell_summary_list %>%
bind_rows()
write_csv(cell_summary, cell_summary_file)
} else {
message('Reading existing cell summary file: ', cell_summary_file)
cell_summary <- read_csv(cell_summary_file, col_types = 'ddiiddid')
}reload <- FALSE
rast_base <- raster(ext = extent(c(-180, 180, -90, 90)), res = 0.1)
values(rast_base) <- 1:length(rast_base)
land_poly <- sf::read_sf(file.path(dir_git, 'spatial/ne_10m_land/ne_10m_land.shp'))
map_plot_file <- file.path(dir_git, 'figs/fig1_map_mean_risk.png')
if(!file.exists(map_plot_file) | reload == TRUE) {
mean_rast <- subs(rast_base, cell_summary, by = 'cell_id', which = 'mean_risk')
writeRaster(mean_rast, file.path(dir_git, 'output', 'mean_risk_raster_010deg.tif'),
overwrite = TRUE)
### mean_rast <- raster(file.path(dir_git, 'output', 'mean_risk_raster_010deg.tif'))
mean_df <- mean_rast %>%
# aggregate(fact = 4) %>%
rasterToPoints() %>%
as.data.frame() %>%
setNames(c('long', 'lat', 'value'))
message('ggplotting the global map...')
x <- ggplot(mean_df) +
geom_raster(aes(long, lat, fill = value)) +
geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
ggtheme_map() +
scale_fill_gradientn(colors = c('green4', 'lightyellow', 'red2', 'red3', 'red4', 'purple4'),
limits = c(0, 1),
labels = c('LC', 'NT', 'VU', 'EN', 'CR', 'EX'),
breaks = c( 0.0, 0.2, 0.4, 0.6, 0.8, 1.0)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = 'Mean risk',
fill = 'Mean risk')
ggsave(filename = map_plot_file, width = 6, height = 4, units = 'in', dpi = 300)
message('now ggplotting the regional maps...')
x <- ggplot(mean_df) +
geom_raster(aes(long, lat, fill = value)) +
geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
ggtheme_map() +
scale_fill_gradientn(colors = c('green4', 'lightyellow', 'red2', 'red3', 'red4', 'purple4'),
limits = c(0, 1),
labels = c('LC', 'NT', 'VU', 'EN', 'CR', 'EX'),
breaks = c( 0.0, 0.2, 0.4, 0.6, 0.8, 1.0)) +
coord_sf(xlim = c(-180, -110), ylim = c(30, 63)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = 'Mean risk (NE Pacific)',
fill = 'Mean risk')
ggsave(filename = str_replace(map_plot_file, '.png', '_ne_pac.png'),
width = 6, height = 4, units = 'in', dpi = 300)
x <- ggplot(mean_df) +
geom_raster(aes(long, lat, fill = value)) +
geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
ggtheme_map() +
scale_fill_gradientn(colors = c('green4', 'lightyellow', 'red2', 'red3', 'red4', 'purple4'),
limits = c(0, 1),
labels = c('LC', 'NT', 'VU', 'EN', 'CR', 'EX'),
breaks = c( 0.0, 0.2, 0.4, 0.6, 0.8, 1.0)) +
coord_sf(xlim = c(-20, 45), ylim = c(28, 48)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = 'Mean risk (Mediterranean)',
fill = 'Mean risk')
ggsave(filename = str_replace(map_plot_file, '.png', '_med.png'),
width = 6, height = 4, units = 'in', dpi = 300)
x <- ggplot(mean_df) +
geom_raster(aes(long, lat, fill = value)) +
geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
ggtheme_map() +
scale_fill_gradientn(colors = c('green4', 'lightyellow', 'red2', 'red3', 'red4', 'purple4'),
limits = c(0, 1),
labels = c('LC', 'NT', 'VU', 'EN', 'CR', 'EX'),
breaks = c( 0.0, 0.2, 0.4, 0.6, 0.8, 1.0)) +
coord_sf(xlim = c(-100, 15), ylim = c(-10, 33)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = 'Mean risk (mid-Atlantic)',
fill = 'Mean risk')
ggsave(filename = str_replace(map_plot_file, '.png', '_mid_atl.png'),
width = 6, height = 4, units = 'in', dpi = 300)
x <- ggplot(mean_df) +
geom_raster(aes(long, lat, fill = value)) +
geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
ggtheme_map() +
scale_fill_gradientn(colors = c('green4', 'lightyellow', 'red2', 'red3', 'red4', 'purple4'),
limits = c(0, 1),
labels = c('LC', 'NT', 'VU', 'EN', 'CR', 'EX'),
breaks = c( 0.0, 0.2, 0.4, 0.6, 0.8, 1.0)) +
coord_sf(xlim = c(75, 150), ylim = c(-30, 30)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = 'Mean risk (Coral Triangle)',
fill = 'Mean risk')
ggsave(filename = str_replace(map_plot_file, '.png', '_se_asia.png'),
width = 6, height = 4, units = 'in', dpi = 300)
} else {
message('Map exists: ', map_plot_file)
}
map_plot_file <- file.path(dir_git, 'figs/fig1_map_var_risk.png')
if(!file.exists(map_plot_file) | reload == TRUE) {
var_rast <- subs(rast_base, cell_summary, by = 'cell_id', which = 'var_risk')
writeRaster(var_rast, file.path(dir_git, 'output', 'var_risk_raster_010deg.tif'),
overwrite = TRUE)
var_df <- var_rast %>%
# aggregate(fact = 4) %>%
rasterToPoints() %>%
as.data.frame() %>%
setNames(c('long', 'lat', 'value'))
x <- ggplot(var_df) +
geom_raster(aes(long, lat, fill = value)) +
geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
ggtheme_map() +
scale_fill_gradientn(colors = c('green3', 'lightyellow', 'red3', 'purple4'),
values = c(0, .25, .5, 1)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = 'Variance of risk',
fill = 'Var of risk')
ggsave(filename = map_plot_file,
width = 6, height = 4, units = 'in', dpi = 300)
} else {
message('Map exists: ', map_plot_file)
}map_plot_file <- file.path(dir_git, 'figs/fig_SI_map_n_risk.png')
if(!file.exists(map_plot_file) | reload == TRUE) {
n_spp_rast <- subs(rast_base, cell_summary, by = 'cell_id', which = 'n_spp_risk')
writeRaster(n_spp_rast, file.path(dir_git, 'output', 'n_spp_risk_raster_010deg.tif'),
overwrite = TRUE)
n_spp_df <- n_spp_rast %>%
# aggregate(fact = 4) %>%
rasterToPoints() %>%
as.data.frame() %>%
setNames(c('long', 'lat', 'value')) %>%
mutate(log_value = log(value))
x <- ggplot(n_spp_df) +
geom_raster(aes(long, lat, fill = log_value)) +
geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
ggtheme_map() +
scale_fill_gradientn(colors = c('lightblue', 'blue1', 'blue2', 'blue3', 'blue4', 'purple4'),
values = c(0, .5, .7, .8, .9, 1)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = 'Number of assessed species (ln transformed)',
fill = 'ln N')
ggsave(filename = map_plot_file,
width = 6, height = 4, units = 'in', dpi = 300)
} else {
message('Map exists: ', map_plot_file)
}map_plot_file <- file.path(dir_git, 'figs/fig2_map_pct_threatened.png')
if(!file.exists(map_plot_file) | reload == TRUE) {
n_threat_rast <- subs(rast_base, cell_summary, by = 'cell_id', which = 'n_spp_threatened')
writeRaster(n_threat_rast, file.path(dir_git, 'output', 'n_threat_raster_010deg.tif'),
overwrite = TRUE)
### n_threat_rast <- raster(file.path(dir_git, 'output', 'n_threat_raster_010deg.tif'))
# n_threat_df <- n_threat_rast %>%
# # aggregate(fact = 4) %>%
# rasterToPoints() %>%
# as.data.frame() %>%
# setNames(c('long', 'lat', 'value')) %>%
# mutate(log_value = log(value))
# x <- ggplot(n_threat_df) +
# geom_raster(aes(long, lat, fill = value)) +
# geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
# ggtheme_map() +
# scale_fill_gradientn(colors = c('grey90', 'coral', 'red1', 'red3', 'red4', 'purple4'),
# values = c(0, .5, .7, .8, .9, 1)) +
# scale_x_continuous(expand = c(0, 0)) +
# scale_y_continuous(expand = c(0, 0)) +
# labs(title = 'Number of threatened species (VU, EN, CR) (ln transformed)',
# fill = 'ln N')
#
pct_threat_rast <- subs(rast_base, cell_summary, by = 'cell_id', which = 'pct_threatened')
writeRaster(pct_threat_rast, file.path(dir_git, 'output', 'pct_threat_raster_010deg.tif'),
overwrite = TRUE)
### n_threat_rast <- raster(file.path(dir_git, 'output', 'n_threat_raster_010deg.tif'))
pct_threat_df <- pct_threat_rast %>%
# aggregate(fact = 4) %>%
rasterToPoints() %>%
as.data.frame() %>%
setNames(c('long', 'lat', 'value'))
x <- ggplot(pct_threat_df) +
geom_raster(aes(long, lat, fill = value)) +
geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
ggtheme_map() +
scale_fill_gradientn(colors = c('grey90', 'coral', 'red2', 'red4', 'purple4'),
limits = c(0, 1),
values = c(0, .25, .5, .6, 1)) +
coord_sf() +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = 'Proportion of threatened species (VU, EN, CR)',
fill = 'proportion')
ggsave(filename = map_plot_file,
width = 6, height = 4, units = 'in', dpi = 300)
x <- ggplot(pct_threat_df, aes(x = value)) +
ggtheme_plot() +
geom_density(alpha = .3, size = .25, color = 'red') +
theme(axis.text.y = element_blank(),
axis.title.y = element_blank()) +
labs(title = 'dist of % species in VU, EN, CR')
quantile(pct_threat_df$value, c(0.01, 0.05, seq(0.10, 1.00, 0.10)))
} else {
message('Map exists: ', map_plot_file)
}## 1% 5% 10% 20% 30% 40% 50%
## 0.0000000 0.0000000 0.0625000 0.1818182 0.2608696 0.3035714 0.3333333
## 60% 70% 80% 90% 100%
## 0.3611111 0.3797468 0.4000000 0.4285714 1.0000000
map_plot_file <- file.path(dir_git, 'figs/fig_SI_map_mean_trend.png')
if(!file.exists(map_plot_file) | reload == TRUE) {
trend_rast <- subs(rast_base, cell_summary, by = 'cell_id', which = 'mean_trend')
writeRaster(trend_rast, file.path(dir_git, 'output', 'trend_raster_010deg.tif'),
overwrite = TRUE)
trend_df <- trend_rast %>%
# aggregate(fact = 4) %>%
rasterToPoints() %>%
as.data.frame() %>%
setNames(c('long', 'lat', 'value'))
trend_range <-range(trend_df$value)
trend_0 <- -trend_range[1] / (trend_range[2] - trend_range[1])
x <- ggplot(trend_df) +
geom_raster(aes(long, lat, fill = value)) +
geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
ggtheme_map() +
scale_fill_gradientn(colors = c('green4', 'green3', 'grey90', 'red3', 'purple4'),
values = c(0, trend_0/2, trend_0, 1 - (1 - trend_0)/2, 1)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = 'Trend in risk',
fill = 'trend')
ggsave(filename = map_plot_file,
width = 6, height = 4, units = 'in', dpi = 300)
} else {
message('Map exists: ', map_plot_file)
}map_plot_file <- file.path(dir_git, 'figs/fig_SI_map_n_trend.png')
if(!file.exists(map_plot_file) | reload == TRUE) {
n_trend_rast <- subs(rast_base, cell_summary, by = 'cell_id', which = 'n_spp_trend')
writeRaster(n_trend_rast, file.path(dir_git, 'output', 'n_trend_raster_010deg.tif'),
overwrite = TRUE)
n_trend_df <- n_trend_rast %>%
# aggregate(fact = 4) %>%
rasterToPoints() %>%
as.data.frame() %>%
setNames(c('long', 'lat', 'value')) %>%
mutate(log_value = log(value))
x <- ggplot(n_trend_df) +
geom_raster(aes(long, lat, fill = value)) +
geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
ggtheme_map() +
scale_fill_gradientn(colors = c('lightblue', 'blue1', 'blue2', 'blue3', 'blue4', 'purple4'),
values = c(0, .5, .7, .8, .9, 1)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = 'Number of trend-assessed species (ln transformed)',
fill = 'ln N')
ggsave(filename = map_plot_file,
width = 6, height = 4, units = 'in', dpi = 300)
} else {
message('Map exists: ', map_plot_file)
}